##' ---
##' title: "2010 - 2014 CCES (3-Wave) Panel Data Analysis for Accounting for Pre-Treatment Exposure in Panel Data, Re-Estimating the Effect of Mass Public Shootings"
##' author: "Todd K. Hartman and Benjamin J. Newman"
##' date: "`r format(Sys.Date())`"
##' output: "github_document"
##' ---

##' Housekeeping
# install.packages("pacman")
# library('pacman')
# devtools::install_github("hrbrmstr/albersusa")
## Load packages
pacman::p_load(albersusa, data.table, geosphere, ggthemes,
               RStata, tidyverse, zipcode)

## Notes -- R Markdown doesn't play nicely with equal signs in comments, so they are
## replaced with '--'' or '->' so the file will render properly.
##
## There were 3 errors in our original mass shooting dataset -- the geocoding 
## for mass shooting id - 74 had the longitude and latitude reversed; for 
## id - 102, the wrong sign was used for longitude; for id - 222, the wrong 
## sign for latitude was used. We corrected these errors directly in the database.

## Import Mass Shootings Data
mps <- read.csv("Mass_Shootings_Data_v1.csv")

##' Plot the mass shootings on the map
## Format date
mps$date <- lubridate::dmy(mps$date)

## Create a copy of the mps data
mps.map <- mps

## For plotting purposes...
## Adjust points for Alaska
mps.map$lon[mps.map$id == "164"] <- -111.5000
mps.map$lat[mps.map$id == "164"] <- 26.1000

## Adjust points for Hawaii
mps.map$lon[mps.map$id == "149"] <- -104.5000
mps.map$lat[mps.map$id == "149"] <- 26.8000

## Draw the US map
usa_map <- usa_composite()  # US map with AK & HI
usa_map <- fortify(usa_map)  # Convert to data.frame for ggplot2

## Subset the data for each plot
fig1 <- labels <- gg <- vector(mode = "list", 3)
fig1[[1]] <- subset(mps.map, date > "2010-11-07" & date < "2012-10-02")
fig1[[1]][["pre.panel"]] <- ifelse(fig1[[1]][["date"]] > "2010-11-07", 0, 1)
fig1[[2]] <- subset(mps.map, date >= "2005-01-01" & date < "2012-10-02")
fig1[[2]][["pre.panel"]] <- ifelse(fig1[[2]][["date"]] > "2010-11-07", 0, 1)
fig1[[3]] <- subset(mps.map, date >= "2000-01-01" & date < "2012-10-02")
fig1[[3]][["pre.panel"]] <- ifelse(fig1[[3]][["date"]] > "2010-11-07", 0, 1)

## Label the plots
labels <- c("N = 17 shootings between panel waves",
            "N = 17 shootings between panel waves\n   w/ N = 35 pre-treatment exposures since 2005",
            "N = 17 shootings between panel waves\n   w/ N = 45 pre-treatment exposures since 2000"
            )

## Plot the events on the 50-state US map
for (i in 1:length(fig1)){
    gg[[i]] <- ggplot() + 
    geom_map(data = usa_map, map = usa_map,
                    aes(long, lat, map_id = id),
                    color = "#2b2b2b", size = 0.2, fill = NA) + 
    geom_point(data = fig1[[i]], aes(x = lon, y = lat, size = victims, 
                                     color = factor(fig1[[i]][["pre.panel"]])), 
               alpha = 1) +
    scale_color_manual(name = factor(fig1[[i]][["pre.panel"]]),
                       breaks = c("0", "1"), 
                       values = c("red4", "grey60"),
                       labels = c("Events b/w Panel Waves", 
                                      "Pre-Treatment Events")) +
    scale_size(range = c(2, 10)) +
    theme_map() +
    ggtitle(labels[[i]]) + 
    theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
          legend.position = "none")
    postscript(paste("mps_nice_figure_", i, ".eps", sep = ""))
    plot(gg[[i]])
    dev.off()
}

##' Data wrangling
## Remove mass shootings that occurred after CCES interviews began in 2012
mps.sub <- subset(mps, date < "2012-10-02", 
                  select = c("id", "date", "victims", "lon", "lat"))
rm(mps)

## Import 2010-2012-2014 (3-wave) CCES Panel data
load("CCES_3-Wave_Panel_10-12-14.rdata")

## [gun10, gun12] Gun control, old coding, 1 -> More Strict, 2 -> Less Strict, 3 -> Kept As They Are
## New coding 3 -> More Strict, 2 -> Same, 1 -> Less Strict
cces$gun10[cces$CC10_320 == 1] <- 3
cces$gun10[cces$CC10_320 == 2] <- 1
cces$gun10[cces$CC10_320 == 3] <- 2

cces$gun12[cces$CC12_320 == 1] <- 3
cces$gun12[cces$CC12_320 == 2] <- 1
cces$gun12[cces$CC12_320 == 3] <- 2

## Remove cases where there is missing data on the DV (at either wave)
completeFun <- function(data, desiredCols) {
    completeVec <- complete.cases(data[ , desiredCols])
    return(data[completeVec, ])
}

cces <- completeFun(cces, c("gun10", "gun12"))

## Check frequencies
prop.table(table(cces$gun10))
prop.table(table(cces$gun12))

## How many Rs did not change their views between panel waves?
prop.table(table(cces$gun10 == cces$gun12))

## How many shifted to gun control (positive scores -> more control)?
table(gun10 = cces$gun10, gun12 = cces$gun12)
prop.table(table(gun10 = cces$gun10, gun12 = cces$gun12), 1)

## [guns.d12] Differenced gun control measure 
## Positive scores = more gun control; negative -> less gun control
cces$guns.d12 <- cces$gun12 - cces$gun10
table(cces$guns.d12)
prop.table(table(cces$guns.d12))

## Check zip codes
length(cces$reszip_10)  # 2010 zip code
length(cces$reszip_12)  # 2012 zip code

## How many Rs provided the same residence zip code?
prop.table(table(cces$reszip_10 == cces$reszip_12))

## Length of time in years at current address (prior to 2010 wave)
summary(cces$addrlength_1_10)  # Years
summary(cces$addrlength_2_10)  # Months

## [dur.address] Calculate duration in years at address
cces$dur.address <- ifelse(is.na(cces$addrlength_2_10), cces$addrlength_1_10, 
                           ifelse(is.na(cces$addrlength_1_10), cces$addrlength_2_10,
                                  cces$addrlength_1_10 + cces$addrlength_2_10/12))
summary(cces$dur.address)
hist(cces$dur.address)

## [date.address] Date moved to residence zip code
## Calculated as interview start date - duration at address
date.add <- as.POSIXlt(as.Date(cces$starttime_10))
date.add$year <- date.add$year - cces$dur.add
date.add <- as.Date(date.add)
head(date.add)
table(is.na(as.Date(date.add)))  # How many missing dates?
cces$date.address <- date.add

## Remove Rs with missing duration at address
cces <- completeFun(cces, c("date.address"))

## [ideo, pid] Add in potential political moderators
cces$ideo <- ifelse(cces$ideo5_10 == 6, NA, cces$ideo5_10)  # [1 = very liberal; 5 = very conservative]
cces$pid <- ifelse(cces$pid7_10 == 8, NA, cces$pid7_10)  # [1 = strong Democrat; 7 = strong Republican]

## Subset the data -- keep only those who did not move between waves
cces.sub <- subset(cces, reszip_10 == reszip_12, 
                   select = c("caseid", "reszip_10", "date.address",
                              "dur.address", "gun10", "gun12", "guns.d12", 
                              "ideo", "pid"))
colnames(cces.sub) <- c("caseid", "zip", "date.address", "dur.address",
                        "gun10", "gun12", "guns.d12", "ideo", "pid")

## Now add the coordinates to the zip centroids in the CCES data
data(zipcode)
cces.gps <- merge(cces.sub, zipcode, by.x = 'zip', by.y = 'zip')
cces.gps <- subset(cces.gps, select = c("caseid", "longitude", "latitude", "date.address",
                                        "dur.address", "gun10", "gun12", "guns.d12", 
                                        "ideo", "pid"))
colnames(cces.gps) <- c("caseid", "lon_r", "lat_r", "date.address", "dur.address",
                        "gun10", "gun12", "guns.d12", "ideo", "pid")

## Prepare the CCES data for merging with mass shootings data
cces.1 <- cces.gps[rep(row.names(cces.gps), nrow(mps.sub)), ]  # Duplicate each row 142 times
cces.1 <- cces.1[order(cces.1$caseid), ]  # Sort by case id
mps.1 <- mps.sub[rep(row.names(mps.sub), nrow(cces.gps)), ]  # Duplicate each row 8513 times

## Convert the data.frame to a data.table
mps.dt <- setDT(mps.1)
cces.dt <- setDT(cces.1)

## Remove extraneous data
rm(cces)
rm(cces.gps)
rm(cces.1)
rm(mps.1)

## Combine the CCES and shooting data into one data table
cces.dt <- cbind(cces.dt, mps.dt)
rm(mps.dt)  # Remvoe the shooting data table

## Remove mps events that occurred prior to R residing at address
cces.dt <- subset(cces.dt, date >= date.address)

## [mi] Calculate distance in (miles) incorporating curvature of the Earth to ALL 142 events
cces.dt <- setDT(cces.dt)
cces.dt <- cces.dt[ , mi := distGeo(matrix(c(lon_r, lat_r), ncol = 2),   # ellipsoid distance
                                            matrix(c(lon, lat), ncol = 2))/1609]  # convert km to miles

## [panel] Create identifier for events occurring b/w panel waves
cces.dt$mps.panel <- ifelse(cces.dt$date > "2010-11-07" & cces.dt$date < "2012-10-02", 1, 0)

## Descriptives for distances occurring b/w CCES panel waves (n -> 17)
summary(cces.dt$mi[cces.dt$mps.panel == 1])

## Rank the distances for each respondent from nearest to farthest
## for ALL shootings that occurred (n -> 142)
cces.dt <- cces.dt %>%
    group_by(caseid) %>%
    arrange(mi) %>%
    mutate(rank.mi.all = order(mi))

## Now rank the distances for each respondent from nearest to farthest
## for shootings that occurred b/w panel waves (n -> 17) & prior events (n -> 125)
cces.dt <- cces.dt %>%
    group_by(mps.panel, caseid) %>%
    arrange(mi) %>%
    mutate(rank.mi = order(mi))

cces.dt <- cces.dt[with(cces.dt, order(caseid, -mps.panel)), ]  # Sort the data

## Create treatment - within 100 miles of ANY mass shooting between panel waves
cces.dt$treat <- ifelse(cces.dt$mps.panel == 1 & cces.dt$mi <= 100, 1, 0)

## Merge with panel data
cces.sub <- left_join(cces.sub, subset(cces.dt, mps.panel == 1 & rank.mi == 1, 
                                       select = c("caseid", "treat", "victims")), 
                      by = "caseid")

summary(cces.sub$treat)
names(cces.sub)[names(cces.sub) == 'victims'] <- 'treat.vics'  #  Rename victims col

## How many Rs exposed to ANY mps within 100 miles b/w panel waves?
table(cces.sub$treat)

## Of ANY WITHIN 100 miles, how many changed gun attitudes?
summary(cces.sub$guns.d12[(cces.sub$treat == 1)])  # Mean change (plus how many missing)
table(cces.sub$guns.d12[(cces.sub$treat == 1)])  # How did Rs change?

## Of ANY BEYOND 100 miles, how many changed gun attitudes?
summary(cces.sub$guns.d12[(cces.sub$treat == 0)])  # Mean change (plus how many missing)
table(cces.sub$guns.d12[(cces.sub$treat == 0)])  # How did Rs change?

## Identify pre-treatment exposures
## ANY mass shooting b/w 2010-2012 within 100 miles
## AND NOT within 100 miles of other events that occurred prior to panel
yrs <- c("1966-01-01", "2000-01-01", "2005-01-01")  # Date thresholds
yrs.labels <- c("66", "00", "05")
tmp.df <- tmp.name1 <- tmp.name2 <- tmp.name3 <- pre100 <- vector(mode = "list", length(yrs))
for (m in 1:length(yrs)){
    tmp.name1 <- "caseid"
    tmp.name2 <- paste("pre", yrs.labels[[m]], sep = "")  # Define variable name for prior exposure
    tmp.name3 <- paste("treat", yrs.labels[[m]], sep = "")  # Define variable name for prior exposure
    tmp.df[[m]] <- subset(cces.dt, select = c("caseid", "date", "mi", "treat"))  # Temporary dataframe
    tmp.df[[m]]$pre100 <- ifelse(cces.dt$mi <= 100 & 
                                (cces.dt$date >= yrs[[m]] & cces.dt$date < "2010-10-01"),
                               1, 0)  # Exposed during these dates
    pre100[[m]] <- aggregate(pre100 ~ caseid, data = tmp.df[[m]], sum)  # Count prior exposures
    colnames(pre100[[m]]) <- c(tmp.name1, tmp.name2)  # Rename variables before merging
    cces.sub <- left_join(cces.sub, pre100[[m]], by = "caseid")  # Merge with CCES
    cces.sub[[tmp.name3]] <- ifelse(cces.sub[[tmp.name2]] == 0 & cces.sub$treat == 1, 1, 0)  # No prior exp.
}

## Create Nearest - NEAREST mass shooting occurred within 100
cces.dt$nearest <- ifelse(cces.dt$rank.mi.all == 1 & cces.dt$mi <= 100, 1, 0)

## Create NH treatment - NEAREST mass shooting occurred within 100 BETWEEN panel waves
cces.dt$treat.nearest <- ifelse(cces.dt$mps.panel == 1 & cces.dt$nearest, 1, 0)

## Merge with panel data
cces.sub <- left_join(cces.sub, subset(cces.dt, rank.mi.all == 1, 
                                       select = c("caseid", "treat.nearest", "victims")), 
                      by = "caseid")

summary(cces.sub$treat.nearest)
names(cces.sub)[names(cces.sub) == 'victims'] <- 'treat.nearest.vics'  #  Rename victims col

## How many Rs exposed to NEAREST mps within 100 miles b/w panel waves?
table(cces.sub$treat.nearest)

## Reshape the data from wide to long to account for panel structure of the data
df <- subset(cces.sub, select = c("caseid", "gun10", "gun12", "ideo", "pid", 
                                  "pre00", "treat00", "pre05", "treat05", 
                                  "treat", "treat.vics", 
                                  "treat.nearest", "treat.nearest.vics"))

df <- df %>%
        gather(year, gun, -c("caseid", "ideo", "pid", 
                             "pre00", "treat00", "pre05", "treat05", 
                             "treat", "treat.vics", 
                             "treat.nearest", "treat.nearest.vics"))
df$year <- ifelse(df$year == "gun10", "2010", "2012")

write.csv(df, file = "df_3-wave.csv", row.names = FALSE)  # Write the data
write_dta(df, "df_3-wave.dta", version = 15)

##' Data analysis
## RStata to call panel methods directly within STATA (on Mac OSX models)
options("RStata.StataPath" = 
            '/Applications/Stata/StataSE.app/Contents/MacOS/stata-se')  # On Mac OSX, change path for Windows
options("RStata.StataVersion" = 15)
stata("set more off, permanently")

stata_code <- "
** Convert strings to numeric
destring caseid - gun, force replace 

** Set as panel data
xtset caseid year

** Difference-in-difference estimators (random effects ordered logit)
xtologit gun i.year##i.treat_nearest, vce(cluster caseid) level(90)
xtologit gun i.year##i.treat_nearest, vce(cluster caseid) level(90) or

xtologit gun i.year##i.treat, vce(cluster caseid) level(90)
xtologit gun i.year##i.treat, vce(cluster caseid) level(90) or

xtologit gun i.year##i.treat05, vce(cluster caseid) level(90)
xtologit gun i.year##i.treat05, vce(cluster caseid) level(90) or

xtologit gun i.year##i.treat00, vce(cluster caseid) level(90)
xtologit gun i.year##i.treat00, vce(cluster caseid) level(90) or

** Blow-Up and Cluster Fixed Effects Model
** https://www.statalist.org/forums/forum/general-stata-discussion/general/485879-coding-a-fixed-effects-estimator-where-response-is-an-ordinal-variable-blow-up-and-cluster
capture program drop bucologit
program bucologit
version 11.2
syntax varlist [if] [in], Id(varname)

preserve

marksample touse
markout `touse' `id'

gettoken yraw x : varlist
tempvar y
qui egen int `y' = group(`yraw')

qui keep `y' `x' `id' `touse'
qui keep if `touse'

qui sum `y'
local ymax = r(max)
forvalues i = 2(1)`ymax' {
qui gen byte `yraw'`i' = `y' >= `i'
}
drop `y'

tempvar n cut newid
qui gen long `n' = _n
qui reshape long `yraw', i(`n') j(`cut')
qui egen long `newid' = group(`id' `cut')
sort `newid'
clogit `yraw' `x', group(`newid') cluster(`id') level(90)

restore            
end

** Fixed effects ordered logit models
gen treatnear_buc = treat_nearest
replace treatnear_buc = 0 if year == 2010

bucologit gun year treatnear_buc, id(caseid)
di exp(.0338415)
di exp(.2774351)
di exp(.5210286)

gen treat_buc = treat
replace treat_buc = 0 if year == 2010
bucologit gun year treat_buc, id(caseid)
di exp(-.2030424)
di exp(-.0260636)
di exp(.1509153)

gen treat05_buc = treat05
replace treat05_buc = 0 if year == 2010
bucologit gun year treat05_buc, id(caseid)
di exp(.0639368)
di exp(.3413253)
di exp(.6187138)

gen treat00_buc = treat00
replace treat00_buc = 0 if year == 2010
bucologit gun year treat00_buc, id(caseid)
di exp(-.1466761)
di exp(.1604182)
di exp(.4675125)
"
stata(stata_code, data.in = df, data.out = TRUE)

